Datos de Carpobrotus edulis en NaturalistaUY

Author

Florencia Grattarola

Published

May 29, 2023

Descarga de datos de Carpobrotus edulis

Para descargar datos de NaturalistaUY usé la API de iNaturalist. Los datos se descargan considerando:

  • Uruguay como localización: place_id=7259
  • Carpobrotus edulis como taxón: taxon_id=49322
  • Registros con Grado de Investigación: quality_grade=research
library(httr)
library(jsonlite)
library(tidyverse)

getCarpobrotusObservations <- function(place_id, taxon_id, quality_grade){
    
  call <- str_glue('https://api.inaturalist.org/v1/observations?place_id={place_id}&taxon_id={taxon_id}&per_page=200&quality_grade={quality_grade}')
  
  get_json_call <- GET(url = call) %>%
      content(as = "text") %>% fromJSON(flatten = TRUE)
  
  if (!is.null(get_json_call)) {
      results <- as_tibble(get_json_call$results)
      results <- results %>% 
      select(taxon.name, taxon.rank, identifications_count, observed_on, 
                  geojson.coordinates, positional_accuracy,
                  user.login, user.id, user.name, user.observations_count,
                  user.identifications_count, user.activity_count, 
                  license_code, num_identification_agreements, uri) %>%
      unnest_wider(geojson.coordinates, names_sep = "_") %>%
      rename(longitude=geojson.coordinates_1, latitude=geojson.coordinates_2)
    }
  return(results)
}

datos_carpobrotus <- getCarpobrotusObservations(place_id=7259,
                                                taxon_id=49322,
                                      quality_grade='research')

Análisis preliminar

Este es un resumen inicial de los datos disponibles al día de hoy 2023-05-29.

Mapas

Para descargar los mapas usé el paquete geouy, haciendo: geouy::load_geouy('Dptos'). Guardé este objeto para evitar descargarlo nuevamente. El archivo tiene como CRS EPSG:32721.

Code
library(lubridate)
library(geonames)
options(geonamesUsername="biodiversidata") # A (free) username is required and rate limits exist
library(tmap)
tmap_mode("view")
library(sf)
sf::sf_use_s2(FALSE)
# options
options(scipen = 999)

uruguay <- readRDS('data/Uruguay.rds')
deptos_costeros <- c('MONTEVIDEO','MALDONADO','CANELONES', 'ROCHA')
costa_uruguay <- uruguay %>% filter(nombre %in% deptos_costeros)

getStateProvince <- function(lat, lng){
  subdivision <- try(GNcountrySubdivision(lat, lng, radius = "1", maxRows = 1), silent = TRUE)
  Sys.sleep(1.0)
  if(class(subdivision)=='try-error'){
    subdivision$adminName1 <- NA
  }
  else if (length(subdivision$adminName1)==0){
    subdivision$adminName1 <- NA
  }
  return(subdivision$adminName1)
}

datos_carpobrotus<- datos_carpobrotus %>%
  mutate(stateProvince=map2_chr(latitude, longitude, getStateProvince)) %>% 
  mutate(observed_on=as_date(observed_on)) %>% 
  mutate(season=lubridate::quarter(observed_on)) %>% 
  mutate(season=ifelse(season==1, 'verano', 
                       ifelse(season==2, 'otoño', 
                              ifelse(season==3, 'invierno', 'primavera'))))

saveRDS(datos_carpobrotus, 'data/datos_carpobrotus.rds')
write_excel_csv(datos_carpobrotus, 'data/datos_carpobrotus.csv', na = '')

sf_carpobrotus <- datos_carpobrotus %>% 
    st_as_sf(coords = c("longitude", "latitude")) %>% 
    st_set_crs(4326) %>% 
    st_transform(32721)

mapa.carpobrotus <- tm_graticules(alpha = 0.3) +
    tm_shape(costa_uruguay) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(sf_carpobrotus) +
    tm_dots(alpha = 0.4)  

mapa.carpobrotus

Más Detalles

Un total de 75 usuaries llevan hechos 200 registros de Carpobrotus edulis. El primer registro es de 2008-03-16 y el último es de 2023-05-09.

  • Datos por departamento
Code
library(knitr)

datos_carpobrotus %>% 
  group_by(stateProvince) %>%
  count() %>%  rename(Departamento=stateProvince, 
                                     `Cantidad de registros`=n) %>% 
  kable()
Departamento Cantidad de registros
Canelones 44
Maldonado 97
Montevideo 1
Montevideo Department 11
Rocha 33
Rocha Department 14
  • Estacionalidad
Code
library(patchwork)

timeline.plot <- datos_carpobrotus %>% 
    add_count(taxon.name, year=year(observed_on), 
              name='records_per_year') %>% 
    ggplot(., aes(x=observed_on, y=records_per_year)) +
    geom_line(show.legend = FALSE, size=1) +
    scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
    theme_bw()+
    labs(x='', y= 'Número de registros por año')

timeline.plot <- datos_carpobrotus %>% 
    add_count(taxon.name, year=year(observed_on), 
              name='records_per_year') %>% 
    ggplot(., aes(x=observed_on, y=records_per_year)) +
    geom_line(show.legend = FALSE, size=1) +
    scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
    theme_bw()+
    labs(x='', y= 'Number of records per year')

  
season.year.plot <- datos_carpobrotus %>% 
    add_count(taxon.name, season, name='records_per_season') %>% 
    mutate(season=factor(season, 
                         levels = c('verano', 'otoño', 'invierno', 'primavera'))) %>%
    ggplot(aes(x=season, y=observed_on)) +  
    geom_jitter(aes(col = season), width = 0.01, show.legend = FALSE) + 
    stat_summary(fun = mean, fun.min = min, fun.max = max) +
    theme_bw() +
    labs(x='', y= 'Fecha')

season.n.plot <- datos_carpobrotus %>% 
    add_count(taxon.name, season, name='records_per_season') %>% 
    mutate(season=factor(season, 
                         levels = c('verano', 'otoño', 'invierno', 'primavera'))) %>% 
    ggplot(aes(x=season, y=records_per_season)) +  
    geom_segment(aes(x=season, xend=season, y=0, 
                     yend=records_per_season, col=season), show.legend = FALSE) +
    geom_point(aes(col=season), show.legend = FALSE) +
    theme_bw() +
    labs(x='', y= 'Número de registros')

timeline.plot / (season.year.plot | season.n.plot)

Code
ggsave(plot = timeline.plot, filename='figs/timeline.plot.svg', device = 'svg', width=6, height=3, dpi=300)

Descarga de todos los datos para Montevideo, Canelones, Maldonado y Rocha

En este caso usamos:

  • Montevideo, Canelones, Maldonado y Rocha como localización: place_id=12416, place_id=12410, place_id=12415, place_id=12420
  • Que no sean de organismos cultivados/captivos: captive=false
  • Que tengangeoprivacy=open

La API sugiere mantener 60 requests per minute (1 solicitud x 1 segundo), así que para descargar estos datos tenemos que generar una pausa en el código para no saturar a la API (delay=1.0). Además, vamos a dividir el resultado en 200 resultados por página (per_page=200).

Problema

De esta manera sólo podemos descargar hasta 10,000 observaciones. Tengo que buscarle la vuelta para controlar el id_above cuando se llega a 10 mil y hacer una nueva llamada con ese id. Ver API Recommended Practices

Code
getAllObservations <- function(place_id){
  
  total_results = NULL
  page = 1 
  delay = 1.0
  results = tibble()
  
  while(is.null(total_results) || nrow(results) < total_results) {
    
    call_url <- str_glue('https://api.inaturalist.org/v1/observations?',
                         'place_id={place_id}',
                         '&captive=false&geoprivacy=open',
                         '&per_page=200&page={page}')
    get_json_call <- GET(url = call_url) %>% 
      content(as = "text") %>% fromJSON(flatten = TRUE)
    
    if (!is.null(get_json_call)) {
      if (is.null(total_results)) {
        total_results = get_json_call$total_results
      }
      results_i <- as_tibble(get_json_call$results) %>% 
        select(taxon.name, taxon.rank, identifications_count, observed_on, 
               geojson.coordinates, positional_accuracy,
               user.login, user.id, user.name, user.observations_count,
               user.identifications_count, user.activity_count, 
               license_code, num_identification_agreements) %>%
        unnest_wider(geojson.coordinates, names_sep = "_") %>%
        rename(longitude=geojson.coordinates_1, latitude=geojson.coordinates_2)
      results <- rbind(results, results_i)
      page <- page + 1
      Sys.sleep(delay)
    }
  }
  return(results)
}

# place_id
montevideo_place_id=12416
canelones_place_id=12410 
maldonado_place_id=12415
rocha_place_id=12420

montevideoObservations <- getAllObservations(montevideo_place_id)
canelonesObservations <- getAllObservations(canelones_place_id)
maldonadoObservations <- getAllObservations(maldonado_place_id)
rochaObservations <- getAllObservations(rocha_place_id)

allObservations <- bind_rows(montevideoObservations, 
                             canelonesObservations,
                             maldonadoObservations,
                             rochaObservations)

# saveRDS(allObservations, 'data/allObservations.rds')

Por ahora vamos a descargar los datos directamente de naturalista.uy/observations/export.

Descarga de datos

Hecha el 12 de marzo, 2023.

Code
allObservations <- read_csv('data/observations-303864.csv', guess_max = 72000)

allObservations <- allObservations %>% 
  filter(coordinates_obscured==FALSE & !is.na(taxon_species_name)) %>% 
  select(kingdom=taxon_kingdom_name, phylum=taxon_phylum_name, 
         class=taxon_class_name, order=taxon_order_name,
         family=taxon_family_name, genus=taxon_genus_name, 
         species=taxon_species_name, scientific_name,
         quality_grade, observed_on, user_login, user_id,
         state_province=place_admin1_name, 
         longitude, latitude)

allObservations_sf <- allObservations %>% 
    st_as_sf(coords = c("longitude", "latitude")) %>% 
    st_set_crs(4326) %>% 
    st_transform(32721)

Análisis espacial

  • Creación de grillas para los cuatro departamentos de la costa (10x10km)
Code
costa_uruguay_grillas <- st_make_grid(st_union(costa_uruguay), 10000) %>%
  st_intersection(st_union(costa_uruguay)) %>% 
  st_sf(gridID=1:length(.), geometry= .) %>% 
  st_make_valid() %>% st_cast() 

Intersección de grillas con datos totales y para Caprobrotus edulis

Code
grillas_allObservations <- st_join(costa_uruguay_grillas,
                                   allObservations_sf) %>% 
  group_by(gridID) %>% 
  summarise(NR=ifelse(n_distinct(species, na.rm=T)!=0, n(), 0),
            SR=n_distinct(species, na.rm = T),
            spsList = paste(species, collapse = ';')) %>% 
  st_cast()

grillas_carpobrotus <- st_join(costa_uruguay_grillas, sf_carpobrotus) %>% 
  group_by(gridID) %>% 
  summarise(NR=ifelse(n_distinct(taxon.name, na.rm=T)!=0, n(), 0)) %>% 
  st_cast()

Estimación del esfuerzo de muestreo por grilla

Usamos la función get_gridsSlopes() generada para análisis de Biodiversidata (Grattarola et al. 2020).

El código está disponible en GitHub: Multiple forms of hotspots of tetrapod biodiversity and the challenges of open-access data scarcity.

To identify the areas of ignorance we quantified the levels of inventory incompleteness for each group by using curvilinearity of smoothed species accumulation curves (SACs). This method assumes that SACs of poorly sampled grids tend towards a straight line, while those of better sampled ones have a higher degree of curvature. As a proxy for inventory incompleteness we calculated the degree of curvilinearity as the mean slope of the last 10% of SACs.

Code
# The function ```get_gridsSlopes``` finds a species accumulation curve (SAC) for each grid-cell using the method ‘exact’ of the function ```specaccum``` of the vegan package and then calculates the degree of curvilinearity as the mean slope of the last 10% of the curve. 

library(vegan)
library(spaa)

get_gridsSlopes <- function(data_abundance){
  gridSlope <- data.frame(gridID=integer(), slope=numeric(), stringsAsFactors=FALSE)
  data_abundance <- as.data.frame(data_abundance) 
  data_abundance$abundance <- as.integer(1)
  cells <- unique(data_abundance$gridID)
  splistT <- list()
  spaccum <- list()
  slope <- list()
  for (i in cells) {
    splist <- data_abundance[data_abundance$gridID == i,c(2:4)]
    splistT[[i]] = data2mat(splist) 
    spaccum[[i]] = specaccum(splistT[[i]], method = "exact")
    slope[[i]] = (spaccum[[i]][[4]][length(spaccum[[i]][[4]])]-
                    spaccum[[i]][[4]][ceiling(length(spaccum[[i]][[4]])*0.9)])/
      (length(spaccum[[i]][[4]])- ceiling(length(spaccum[[i]][[4]])*0.9))
    gridSlope_i <- data.frame(gridID=i, slope=slope[[i]], stringsAsFactors=FALSE)
    gridSlope <- rbind(gridSlope, gridSlope_i) 
  }
  gridSlope <- gridSlope %>% as_tibble() %>% 
    mutate(slope=ifelse(is.nan(slope), NA, slope))
  return(gridSlope)
}

allObservations.SACs <- grillas_allObservations %>% as_tibble() %>% 
    mutate(species=str_split(spsList, ';')) %>% 
    unnest(species) %>% 
    group_by(spsList) %>% mutate(sample = row_number()) %>% 
    ungroup() %>% 
    mutate(sample=ifelse(is.na(species), 0 , sample)) %>% 
    select(gridID, sample, species)

allObservations.incompleteness <- get_gridsSlopes(allObservations.SACs)

Unión espacial de todas los datos por grilla: número de registros totales, número de especies totales, curva de incompleteness (esfuerzo de muestreo), y número de registros de Carpobrotus edulis.

Code
costa_uruguay.incompleteness <- left_join(grillas_allObservations,
          allObservations.incompleteness) %>%
  left_join(., grillas_carpobrotus %>% 
              rename(carpobrotus=NR) %>% 
              st_drop_geometry())

Figuras

  • Mapas
Code
registros.carpobrotus <- ggplot() +
    geom_sf(data=costa_uruguay.incompleteness %>% 
                mutate(carpobrotus=ifelse(carpobrotus==0, NA, carpobrotus)),
            aes(fill=carpobrotus)) +
    scale_fill_fermenter(palette ='YlGn', direction = 1,
                         breaks = c(0,1, 5,10,15,20,25), 
                         na.value="#ede8e8") + 
    geom_sf(data=costa_uruguay, fill=NA) +
    labs(fill='Number of\nCarpobrotus records') +
    theme_bw()

n.registros <- ggplot() +
    geom_sf(data=costa_uruguay.incompleteness %>% 
              mutate(NR=ifelse(NR==0, NA, NR)),
            aes(fill=NR)) +
    scale_fill_fermenter(palette ='YlGn', 
                         direction = 1, 
                         breaks = c(0, 100, 500, 750, 1000, 1500), 
                         na.value="#ede8e8") + 
    geom_sf(data=costa_uruguay, fill=NA) +
    labs(fill='Total\nnumber of\nrecords') +
    theme_bw()

sac <- ggplot() +
    geom_sf(data=costa_uruguay.incompleteness, aes(fill=slope)) +
    scale_fill_fermenter(palette ='Spectral', na.value="#ede8e8") + 
    geom_sf(data=costa_uruguay, fill=NA) +
    labs(fill='Incompleteness') +
    theme_bw()
  
registros.carpobrotus/ n.registros / sac

  • Correlaciones
Code
library(ggpubr)

tabla.final <- costa_uruguay.incompleteness %>% 
    st_drop_geometry() %>% 
    select(grilla=gridID, NR, SR, carpobrotus, slope)

summary(lm(carpobrotus ~ NR, data=tabla.final)) 

Call:
lm(formula = carpobrotus ~ NR, data = tabla.final)

Residuals:
     Min       1Q   Median       3Q      Max 
-12.2200  -0.1682   0.0450   0.1019  24.5596 

Coefficients:
             Estimate Std. Error t value            Pr(>|t|)    
(Intercept) -0.101908   0.161861   -0.63                0.53    
NR           0.007109   0.000530   13.41 <0.0000000000000002 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.422 on 259 degrees of freedom
Multiple R-squared:  0.4099,    Adjusted R-squared:  0.4077 
F-statistic: 179.9 on 1 and 259 DF,  p-value: < 0.00000000000000022
Code
nr.caprobrotus <- tabla.final %>% filter(carpobrotus>0) %>% 
    ggplot(aes(x=NR, y=carpobrotus)) +
    geom_jitter() + 
    geom_smooth(method = 'lm') + 
    stat_regline_equation(label.y = 30, aes(label = ..eq.label..)) +
    stat_regline_equation(label.y = 28, aes(label = ..adj.rr.label..)) +
    labs(x='Number of records per grid cell', y='Caprobrotus edulis records per grid cell') +
    theme_bw()

summary(lm(carpobrotus ~ slope, data=tabla.final)) 

Call:
lm(formula = carpobrotus ~ slope, data = tabla.final)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.5080 -1.5763 -0.3936  1.2652 29.9700 

Coefficients:
            Estimate Std. Error t value       Pr(>|t|)    
(Intercept)    7.350      1.058   6.950 0.000000000155 ***
slope         -8.615      1.455  -5.921 0.000000026430 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.838 on 131 degrees of freedom
  (128 observations deleted due to missingness)
Multiple R-squared:  0.2111,    Adjusted R-squared:  0.2051 
F-statistic: 35.06 on 1 and 131 DF,  p-value: 0.00000002643
Code
incompleteness.caprobrotus <- tabla.final %>% filter(carpobrotus>0) %>% 
    ggplot(aes(x=slope, y=carpobrotus)) +
    geom_point() + geom_smooth(method = 'lm') + 
    stat_regline_equation(label.y = 30, label.x = 0.65, aes(label = ..eq.label..)) +
    stat_regline_equation(label.y = 28, label.x = 0.65, aes(label = ..adj.rr.label..)) +
    labs(x='Sampling incompleteness per grid cell', y='Caprobrotus edulis records per grid cell') +
    theme_bw()

nr.caprobrotus / incompleteness.caprobrotus

Tabla

La tabla final de datos con información por grilla:
- Núm de registros totales: cantidad de registros en NaturalistaUY de cualquier especie
- Núm de especies totales: cantidad de especies en NaturalistaUY
- Núm de registros Carpobrotus: cantidad de registros en NaturalistaUY de Carpobrotus edulis
- Incompleteness: medida de esfuerzo de muestreo (valores cercanos a 0 indican grillas completas)

Code
tabla.final %>% filter(carpobrotus>0) %>% arrange(slope) %>% 
  select(`Núm de registros totales`=NR,
         `Núm de especies`=SR,
         `Núm de registros Carpobrotus`=carpobrotus, 
         Incompleteness=slope) %>% 
  kable(digits = 3)
Núm de registros totales Núm de especies Núm de registros Carpobrotus Incompleteness
1874 802 1 0.228
981 421 10 0.247
1614 714 2 0.253
1483 676 35 0.269
1257 610 7 0.287
1022 498 14 0.302
1093 551 13 0.322
990 488 13 0.327
625 346 3 0.357
862 504 7 0.362
1161 650 3 0.365
568 306 1 0.374
720 423 19 0.390
1145 675 11 0.395
408 240 6 0.407
321 194 4 0.426
125 83 4 0.460
162 105 1 0.468
424 274 8 0.470
315 212 1 0.473
382 253 8 0.485
531 367 3 0.508
201 157 3 0.643
149 120 2 0.668
185 150 4 0.685
101 83 3 0.692
88 78 1 0.791
Code
tm_graticules(alpha = 0.3) +
    tm_shape(costa_uruguay) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(costa_uruguay.incompleteness %>% select(grilla=gridID, NR, SR, carpobrotus, slope)) +
    tm_fill(col = 'slope', palette = 'Spectral', n = 6, style='jenks')  
Code
tm_graticules(alpha = 0.3) +
    tm_shape(costa_uruguay) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(costa_uruguay.incompleteness %>% select(grilla=gridID, NR, SR, carpobrotus, slope)) +
    tm_fill(col = 'carpobrotus', palette = 'Greens', n = 6, style='jenks')